// OpentTxl-C Version 11 scanner
// J.R. Cordy, Jan 2023

// Copyright 2023, James R. Cordy and others

// Permission is hereby granted, free of charge, to any person obtaining a copy of this software 
// and associated documentation files (the “Software”), to deal in the Software without restriction, 
// including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
// and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
// subject to the following conditions:

// The above copyright notice and this permission notice shall be included in all copies 
// or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE 
// AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

// The TXL input scanner.
// Reads input as text and breaks it into an array of tokens, as defined by the TXL or object language grammar.

// Modification Log

// v11.0 Initial revision, adapted from OpenTxl 11.0

// v11.3 Fixed lookahead source line number bug.
//       Fixed multiple nl-comments source line number bug.

// I/O, strings, memory allocation
#include "support.h"

// Global modules
#include "locale.h"
#include "limits.h"
#include "options.h"
#include "tokens.h"
#include "errors.h"
#include "shared.h"
#include "charset.h"
#include "idents.h"

// Check interface consistency
#include "scan.h"

// Compound literal tokens
struct scanner_compoundT {
    int    length;
    string literal;
};

// Compound literal tokens - 1-origin [1 .. maxCompoundTokens]
struct scanner_compoundT scanner_compoundTokens[maxCompoundTokens + 2];     // (sic) - leave room for end marker
int scanner_nCompounds; // 0

// Mapping from char to potential compound tokens
int scanner_compoundIndex[ASCII];

// Comment brackets - 1-origin [1 .. maxCommentTokens]
tokenT scanner_commentStart[maxCommentTokens + 1];
tokenT scanner_commentEnd[maxCommentTokens + 1];
int scanner_nComments;  // 0

// Token pattern table
// Holds both TXL and object language token patterns

typedef short scanner_patternCodeT;  // 0 .. 1000
// 1-origin [1 .. maxStringLength]
typedef scanner_patternCodeT scanner_patternT[maxStringLength + 1];

struct scanner_patternEntryT {
    enum treeKindT kind;
    tokenT name;
    scanner_patternT pattern;
    int length;
};

// Structure:
//      pi=patternIndex[c] -> pl=patternLink[pi,pi+1,pi+2 ... ] (pl != 0) -> tokenPatterns[pl]

// 1-origin [1 .. maxTokenPatterns]
struct scanner_patternEntryT scanner_tokenPatterns[maxTokenPatterns + 1];
int scanner_nPatterns;              // 0
int scanner_nPredefinedPatterns;    // 0

// map to first pattern link for each starting char
int scanner_patternIndex[ASCII];

// pattern link for the special pattern for NL comments, if any
int scanner_patternNLCommentIndex;  // 0

// ordered sublists of links to alternative pattern entries, ended by 0 
// 1-origin [1 .. maxTokenPatternLinks]
int scanner_patternLink[maxTokenPatternLinks + 1];
int scanner_nPatternLinks;  // 0

// next free user token kind
enum treeKindT scanner_nextUserTokenKind;  // firstUserTokenKind

// Magic characters
#define EOF_ '\004'     // end of file, represented by ASCII/Unicode EOT
#define EOS  '\0'       // end of string, ASCII/Unicode NUL

// Token pattern regexp codes - these integer codes must not conflict with
// useful characters in extended ASCII or Unicode
#define EOSPAT 0

// Ranges used in standard 16-bit Unicode
//    00 .. FF          (000 .. 255):           ASCII
// 01 00 .. 01 7F       (256 .. 383):           Latin Extended-A
// 01 80 .. 02 4F       (384 .. 591):           Latin Extended-B
// 1E 02 .. 1E F3       (7682 .. 7923):         Latin Extended Addiional
// 2C 60 .. 2C 7F       (11360 .. 11391):       Latin Extended-C
// A7 20 .. A7 FF       (42784 .. 43007):       Latin Extended-D
// AB 30 .. AB 6F       (43824 .. 43887):       Latin Extended-E

// Automatically recognizing these, since they don't conflict with ASCII
// For the first few UTF-16 codes compatible with ASCII, we automatically change any two byte 
// Unicode character C beginning with these to sequence pattern (C) 
#define UNICODEA  (0x01)
#define UNICODEB  (0x02)
#define UNICODEN  (0x08)                // ASCII BS - after this conflict with TAB
#define UNICODEX  (0x1E)

// Not automatically supporting these yet, since they conflict
// #define UNICODEC  0x2C
// #define UNICODED  0xA7
// #define UNICODEF  0xAB

// Reserved TXL Pattern Codes - closest to 0 avoiding above
#define PATTERN  600

// meta-character classes and special characters, \n, \t, \d, \a, ...
#define NEWLINE         (PATTERN + 0)
#define TAB             (PATTERN + 1)
#define DIGIT           (PATTERN + 2)
#define ALPHA           (PATTERN + 3)
#define ID              (PATTERN + 4)
#define UPPER           (PATTERN + 5)
#define UPPERID         (PATTERN + 6)
#define LOWER           (PATTERN + 7)
#define LOWERID         (PATTERN + 8)
#define SPECIAL         (PATTERN + 9)
#define ANY             (PATTERN + 10)
#define ALPHAID         (PATTERN + 11)
#define RETURN          (PATTERN + 12)

// complements of above, #t, #n, #d, #a, ...
#define NOTNEWLINE      (PATTERN + 20)
#define NOTTAB          (PATTERN + 21)
#define NOTDIGIT        (PATTERN + 22)
#define NOTALPHA        (PATTERN + 23)
#define NOTID           (PATTERN + 24)
#define NOTUPPER        (PATTERN + 25)
#define NOTUPPERID      (PATTERN + 26)
#define NOTLOWER        (PATTERN + 27)
#define NOTLOWERID      (PATTERN + 28)
#define NOTSPECIAL      (PATTERN + 29)
#define NOTANY          (PATTERN + 30)
#define NOTALPHAID      (PATTERN + 31)
#define NOTRETURN       (PATTERN + 32)

// #... negated pattern
#define NOT             (PATTERN + 40)

// [...], #[...] choice, negated choice
#define CHOICE          (PATTERN + 41)
#define NOTCHOICE       (PATTERN + 42)

// (...), #(...) sequence, negated sequence
#define SEQUENCE        (PATTERN + 43)
#define NOTSEQUENCE     (PATTERN + 44)

// :..., #:... lookahed, negated lookahead
#define LOOKAHEAD       (PATTERN + 45)
#define NOTLOOKAHEAD    (PATTERN + 46)

// \\... escaped character
#define ESCAPE          (PATTERN + 47)

// Regexp meta-character to pattern code maps
#define nPatternChars 13
static const char scanner_patternChars[nPatternChars + 1] = 
    {UNUSED, 'd', 'a', 'u', 'i', 'A', 'I', 'b', 'j', 's', 'c', 'n', 'r', 't'};
static const scanner_patternCodeT scanner_patternCodes[nPatternChars + 1] = 
    {UNUSED, DIGIT, ALPHA, ALPHAID, ID, UPPER, UPPERID, LOWER, LOWERID, SPECIAL, ANY, NEWLINE, RETURN, TAB};
static const scanner_patternCodeT scanner_patternNotCodes[nPatternChars + 1] = 
    {UNUSED, NOTDIGIT, NOTALPHA, NOTALPHAID, NOTID, NOTUPPER, NOTUPPERID, NOTLOWER, NOTLOWERID, NOTSPECIAL, EOSPAT, NOTNEWLINE, NOTRETURN, NOTTAB};

// Keyword table
// Holds both TXL and object language keyword tokens - 1-origin [1 .. maxKeys]
tokenT scanner_keywordTokens[maxKeys + 1];
int scanner_nKeys;              // 0
static int scanner_nTxlKeys;    // 0
static int scanner_lastKey;     // 0

bool scanner_keyP (const tokenT token)
{
    // binary search the sorted keyword table
    int lo = 1;
    int hi = scanner_nKeys;
    while (true) {
        if (lo > hi) break;

        const int mid = (lo + hi) / 2;
        const tokenT kwtmid = scanner_keywordTokens[mid];

        if (token < kwtmid) {
            hi = mid - 1;
        } else if (token > kwtmid) {
            lo = mid + 1;
        } else {
            return (true);
        }
    }

    return (false);
}

// Current input file
static int scanner_inputStream;  // tfstdin

// TXL source file include facility
struct scanner_includeStackEntry {
    int file;
    int filenum;
    int linenum;
};

// 1-origin [1 .. maxIncludeDepth]
static struct scanner_includeStackEntry scanner_includeStack[maxIncludeDepth + 1];
static int scanner_includeDepth;  // 0

// Directory context for includes
static string scanner_sourceFileDirectory;  // ""

// Input text buffer
#define inputBufferFactor 2     // must be >= 2, doesn't have to be big any more

// Use variable buffer size, to force dynamic allocation for efficiency
static int scanner_lineBufferSize;  // maxLineLength*inputBufferFactor + maxStringLength + 1 

static array (char, scanner_inputline); // ""

static string scanner_nextinputline;    // ""
static int scanner_nextlength;          // 0
static int scanner_inputchar;

// Current input line and file number
static int scanner_filenum;
static int scanner_linenum;

// Kind of source file - TXL source must be scanned specially due to its multi-language context
static bool scanner_txlSource;
static bool scanner_fileInput;

// Text buffer for [parse] predefined function - 
// length must match the type longstring in predef.i
static char scanner_sourceText[maxLineLength + maxStringLength + 1];    // for string type cheats

// Only give the max input lines warning once
static bool scanner_warnedLines;  // false

// Add a scanned token to the inputTokens array
static void scanner_installToken (const enum treeKindT kind, const tokenT token, const tokenT rawtoken)
{
    lastTokenIndex += 1;

    if (lastTokenIndex >= maxTokens) {  // (sic)
        string message;
        stringprintf (message, "Input too large (total length > %d tokens) (a larger size is required for this input)", maxTokens);
        error ("", message, LIMIT_FATAL, 141);
    }

    if (scanner_linenum > maxLines) {
        if (!scanner_warnedLines) {
            string context, message;
            stringprintf (context, "line %d of %s", scanner_linenum + 1, fileNames[scanner_filenum]);
            stringprintf (message, "Input file too long (> %d lines) (a larger size should be used for this input)", maxLines);
            error (context, message, LIMIT_WARNING, 142);
            scanner_warnedLines = true;
        }
    }

    struct tokenTableT *inputToken = &(inputTokens[lastTokenIndex]);
    inputToken->token = token;
    inputToken->rawtoken = rawtoken;
    inputToken->kind = kind;
    inputToken->linenum = (scanner_filenum * maxLines) + scanner_linenum;

    ident_setKind (token, kind);
    ident_setKind (rawtoken, kind);

    // Debugging output from -Dtokens
    if ((options_option[tokens_p]) && (!scanner_txlSource)) {
        fprintf (stderr, "<");
        if (lstringchar (*ident_idents[kindType[kind]], 1) == '*') {
            fprintf (stderr, "%s", &(lstringchar (*ident_idents[kindType[kind]], 2)));
        } else {
            fprintf (stderr, "%s", *ident_idents[kindType[kind]]);
        }

        // fprintf (stderr, " value=%d,", (unsigned char) *ident_idents[rawtoken]);
        fprintf (stderr, " text=\"");
        charset_putXmlCode (0, *ident_idents[rawtoken]);
        fprintf (stderr, "\"/>\n");
    }
}

// Get the next buffer of input text from the input source
static void scanner_getInputLine (void) {

    if (scanner_fileInput) {

        if ((options_option[multiline_p]) && (!scanner_txlSource)) {

            // Object language input can have multiline tokens, so we buffer many lines of text at once
            // Default for object languages

            if (tfeof (scanner_inputStream)) {

                // End of input
                if (lstringlen (scanner_inputline) == 0) {
                    lstringchar (scanner_inputline, 1) = EOF_;
                    lstringchar (scanner_inputline, 2) = EOS;
                }

            } else {

                // Refill buffer
                int lengthSoFar = lstringlen (scanner_inputline);

                while (true) {
                    if (lengthSoFar > (maxLineLength * (inputBufferFactor - 1))) break;

                    tfgetstring (&(stringchar (scanner_inputline, lengthSoFar + 1)), scanner_inputStream);

                    int bufferlength = stringlen (&(stringchar (scanner_inputline, lengthSoFar + 1)));

                    if (bufferlength == maxStringLength) {
                        // Got a partial long line; keep reading until we hit the end of it 
                        int nextIndex = lengthSoFar + bufferlength;
                        while (true) {
                            string buffer;
                            tfgetstring (buffer, scanner_inputStream);
                            bufferlength = stringlen (buffer);

                            if ((((nextIndex - 1) + bufferlength) - lengthSoFar) > maxLineLength) {
                                string message;
                                stringprintf (message, "Input line too long (> %d characters)", maxLineLength);
                                error ("", message, LIMIT_FATAL, 144);
                            }

                            lstringcpy (&(lstringchar (scanner_inputline, nextIndex + 1)), buffer);

                            nextIndex += bufferlength;

                            if (bufferlength != maxStringLength) break;
                        }

                        lengthSoFar = nextIndex;

                    } else {
                        // Got a complete line, concated on the end of our current buffer
                        lengthSoFar += bufferlength;
                    }

                    lstringcpy (&(lstringchar (scanner_inputline, lengthSoFar + 1)), "\n");
                    lengthSoFar += 1;

                    if (tfeof (scanner_inputStream)) break;
                }
            }

        } else { // not options.option (multiline), or txlSource

            if (tfeof (scanner_inputStream)) {
                // End of input
                lstringchar (scanner_inputline, 1) = EOF_;
                lstringchar (scanner_inputline, 2) = EOS;

            } else {
                // Single line free-form input
                tfgetstring (scanner_inputline, scanner_inputStream);

                if (stringlen (scanner_inputline) == maxStringLength) {
                    // The single line is longer than our max string length, so continue reading it

                    if (!scanner_txlSource) {
                        int nextIndex = maxStringLength + 1;
                        while (true) {
                            string      buffer;
                            tfgetstring (buffer, scanner_inputStream);
                            const int bufferlength = stringlen (buffer);

                            if ((nextIndex - 1) + bufferlength > maxLineLength) {
                                string message;
                                stringprintf (message, "Input line too long (> %d characters)", maxLineLength);
                                error ("", message, LIMIT_FATAL, 144);
                            }

                            lstringcpy (&(lstringchar (scanner_inputline, nextIndex)), buffer);

                            if (bufferlength != maxStringLength) break;

                            nextIndex += maxStringLength;
                        }

                    } else {
                        // TXL source programs are limited to max string length lines
                        string message;
                        stringprintf (message, "TXL program line too long (> %d characters)", maxStringLength - 1);
                        error ("", message, LIMIT_FATAL, 145);
                    }
                }

                lstringcat (scanner_inputline, "\n");
            }
        }

    } else { // not file input

        // String text to scan, from the [parse] predefined function

        if ((lstringchar (scanner_sourceText, 1) != EOF_) || (lstringchar (scanner_sourceText, 2) != EOS)) {
            lstringcpy (scanner_inputline, scanner_sourceText);
            lstringcat (scanner_inputline, "\n");    // always add newline in the new regimen
            // mark the end of the string as end of input
            lstringchar (scanner_sourceText, 1) = EOF_;
            lstringchar (scanner_sourceText, 2) = EOS;

        } else {
            // Make sure we only give EOF_ if we've already processed the text
            lstringchar (scanner_inputline, 1) = EOF_;
            lstringchar (scanner_inputline, 2) = EOS;
        }
    }

    // Begin at the first character in the buffer
    scanner_inputchar = 1;
}

// TXL language include file facility
// Maintain a stack of currently open files, reading input from the top file

static void scanner_PushInclude (void) {
    // Get the new include file name from the TXL include statement, and be sure we align on a line boundary
    string newFileName;
    stringcpy (newFileName, &(lstringchar (scanner_inputline, scanner_inputchar)));

    // Strip quotes from the file name
    if (stringindex (newFileName, "\"") != 0) {
        substring (newFileName, newFileName, stringindex (newFileName, "\"") + 1, stringlen (newFileName));
    }

    if (stringindex (newFileName, "\"") != 0) {
        substring (newFileName, newFileName, 1, stringindex (newFileName, "\"") - 1);
    }

    // Remember what directory we started in!
    string oldNewFileName;
    stringcpy (oldNewFileName, newFileName);
    stringcpy (newFileName, scanner_sourceFileDirectory), stringcat (newFileName, oldNewFileName);

    // Open the new included source file
    if (nFiles == maxFiles) {
        string message;
        stringprintf (message, "Too many source include files (> %d)", maxFiles);
        error ("", message, LIMIT_FATAL, 149);
    }

    int newInputStream;
    tfopen (OPEN_CHAR_READ, newFileName, &newInputStream);

    for (int i = 1; i <= options_nTxlIncludeLibs; i++) {
        if (newInputStream != 0) break;
        stringcpy (newFileName, options_txlIncludeLibs[i]), stringcat (newFileName, directoryChar), stringcat (newFileName, oldNewFileName);
        tfopen (OPEN_CHAR_READ, newFileName, &newInputStream);
    }

    if (newInputStream == 0) {
        string message;
        stringprintf (message, "Unable to find include file '%s'", oldNewFileName);
        error ("", message, FATAL, 150);
    }

    // Push old source file onto the include stack
    if (scanner_includeDepth == maxIncludeDepth) {
        string message;
        stringprintf (message, "Include file nesting too deep (> %d)", maxIncludeDepth);
        error ("", message, LIMIT_FATAL, 151);
    }

    scanner_includeDepth += 1;

    struct scanner_includeStackEntry *is = &(scanner_includeStack[scanner_includeDepth]);
    is->file = scanner_inputStream;
    is->filenum = scanner_filenum;
    is->linenum = scanner_linenum;

    nFiles += 1;
    stringcpy (fileNames[nFiles], newFileName);

    scanner_filenum = nFiles;
    scanner_inputStream = newInputStream;

    // Start reading from the new include file
    scanner_linenum = 0;
    scanner_getInputLine ();
}

static void scanner_PopInclude (void) {
    // Revert to the previous source file after end of file on the included file
    assert (scanner_includeDepth > 0);
    tfclose (scanner_inputStream);

    // Continue where we left off in the previous file (i.e., following the TXL include statement)
    struct scanner_includeStackEntry *is = &(scanner_includeStack[scanner_includeDepth]);
    scanner_inputStream = is->file;
    scanner_filenum = is->filenum;
    scanner_linenum = is->linenum;

    scanner_includeDepth -= 1;

    lstringcpy (scanner_inputline, "\n");
    scanner_inputchar = 1;
}

// We need scanToken when opening a file, in case the first line is an object language comment
// See detailed explanation below
static bool scanner_scanToken (const scanner_patternT pattern, const int startpos, const int endpos, const bool test);

static void scanner_openFile (const string fileNameOrText)
{
    // Open a main TXL or object language input source file, or string input for [parse]
    nFiles = 1;

    if (scanner_fileInput) {
        // Open a TXL or object language input source file
        stringcpy (fileNames[1], fileNameOrText);

        // Standard input is already open
        if (((stringcmp (fileNameOrText, "") == 0) || (stringcmp (fileNameOrText, "stdin") == 0)) || (stringcmp (fileNameOrText, "STDIN") == 0)) {
            scanner_inputStream = tfstdin;
        } else {
            tfopen (OPEN_CHAR_READ, fileNameOrText, &scanner_inputStream);
        }

        if (scanner_inputStream == 0) {
            string message;
            stringprintf (message, "Unable to open source file '%s'", fileNameOrText);
            error ("", message, FATAL, 152);
        }

        // Remember the main source file's directory path, for context in processing TXL include files
        if ((stringindex (fileNameOrText, "/") != 0) || (stringindex (fileNameOrText, "\\") != 0)) {
            stringcpy (scanner_sourceFileDirectory, fileNameOrText);
            while (true) {
                if ((scanner_sourceFileDirectory[stringlen (scanner_sourceFileDirectory) - 1] == '/')
                        || (scanner_sourceFileDirectory[stringlen (scanner_sourceFileDirectory) - 1] == '\\')) 
                    break;
                substring (scanner_sourceFileDirectory, scanner_sourceFileDirectory, 1, stringlen (scanner_sourceFileDirectory) - 1);
            }
        }

    } else {
        // Input from a string of text to be scanned and parsed using [parse]
        stringcpy (fileNames[1], "(no file)");
        stringcpy (scanner_sourceText, fileNameOrText);
    }

    // Very special case, for object languages with first column marker comments of the form
    //    * this is a comment
    // for example in Snobol, when specified using a token pattern of the form
    //    comment  "\n\*#n*"

    if ((options_option[multiline_p]) && (!scanner_txlSource)) {
        // If newline comments are allowed, there is an implicit newline at the beginning of the file
        if (((options_option[nlcomments_p]) && (options_option[newline_p])) && (!(options_option[charinput_p]))) {
            lstringcpy (scanner_inputline, "\n");
        } else {
            lstringchar (scanner_inputline, 1) = EOS;
        }
    }

    // Initialize the input text buffer
    scanner_filenum = 1;
    scanner_linenum = 0;

    scanner_getInputLine ();

    // We begin scanning on line 1
    scanner_linenum = 1;

    // Continuing the very special case outlined above, we only need the implicit newline 
    // if the input actually begins with a newline comment
    if ((options_option[multiline_p]) && (!scanner_txlSource)) {
        if (((options_option[nlcomments_p]) && (options_option[newline_p])) && (!(options_option[charinput_p]))) {
            // Do we have a leading newline comment?
            int nlpatindex = scanner_patternIndex['\n'];
            assert (nlpatindex != 0);
            bool leadingNLcomment = false;
            while (true) {
                const struct scanner_patternEntryT *pp = &(scanner_tokenPatterns[scanner_patternLink[nlpatindex]]);
                if ((pp->kind == treeKind_comment) && (scanner_scanToken (pp->pattern, 1, pp->length, true))) {
                    leadingNLcomment = true;
                }
                nlpatindex ++;
                if ((leadingNLcomment) || (scanner_patternLink[nlpatindex] == 0))
                    break;
            } 

            if (leadingNLcomment) {
                // If so, keep the implicit newline, but it's on line 0
                scanner_linenum = 0;
                scanner_inputchar = 1;
            } else {
                // Skip the implicit newline, and we begin on line 1
                scanner_linenum = 1;
                scanner_inputchar = 2;
            }
        }
    }
}

static void scanner_closeFile (void) {
    // Close the main input file
    if (scanner_fileInput && (scanner_inputStream != tfstdin)) {
        tfclose (scanner_inputStream);
        scanner_inputStream = 0;
    }
}

// TXL Preprocessor module 
// Conditional compilation handling for TXL, providing #define, #ifdef, #else, #endif
// See handlePreprocessorDirective below for details

// Stack of currently nested #ifdefs - 1-origin [1 .. maxIfdefDepth]
static bool scanner_ifdefStack[maxIfdefDepth + 1];
static int scanner_ifdefFile[maxIfdefDepth + 1];
static int scanner_ifdefTop;  // 0

static void scanner_synchronizePreprocessor (void) {
    if ((scanner_ifdefTop > 0) && ((scanner_ifdefFile[scanner_ifdefTop]) == scanner_filenum)) {
        string context;
        stringprintf (context, "line %d of %s", scanner_linenum + 1, fileNames[scanner_filenum]);
        error (context, "Preprocessor syntax error: missing #endif directive", FATAL, 153);
        scanner_ifdefTop = 0;
    }
}

static void scanner_pushIfdef (const string symbol, const bool negated)
{
    const int symbolIndex = options_lookupIfdefSymbol (symbol);
    if (scanner_ifdefTop < maxIfdefDepth) {
        scanner_ifdefTop += 1;
        scanner_ifdefFile[scanner_ifdefTop] = scanner_filenum;
        if (negated) {
            scanner_ifdefStack[scanner_ifdefTop] = symbolIndex == 0;
        } else {
            scanner_ifdefStack[scanner_ifdefTop] = symbolIndex != 0;
        }
    } else {
        string message;
        stringprintf (message, "#ifdef nesting too deep (> %d levels deep)", maxIfdefDepth);
        error ("", message, LIMIT_FATAL, 155);
    }
}

static void scanner_popIfdef (void) {
    if (scanner_ifdefTop > 0) {
        scanner_ifdefTop -= 1;
    } else {
        string context;
        stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
        error (context, "Preprocessor syntax error: too many #endif directives (no matching #if)", FATAL, 156);
    }
}

static bool scanner_trueIfdef (void) {
    assert (scanner_ifdefTop > 0);
    return (scanner_ifdefStack[scanner_ifdefTop]);
}

#define pMatchingElsifElseOrEndif 1
#define pMatchingEndif 2

static void scanner_flushLinesUntilPreprocessorDirective (const int whichDirective)
{
    while (true) {
        scanner_getInputLine ();
        scanner_linenum += 1;

        if (lstringchar (scanner_inputline, scanner_inputchar) == EOF_) break;

        if (stringindex (&(lstringchar (scanner_inputline, scanner_inputchar)), "#") != 0) {
            const int startchar = scanner_inputchar;
            // Skip blanks
            while (true) {
                if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                scanner_inputchar += 1;
            }
            // See if it is a preprocessor line
            if (lstringchar (scanner_inputline, scanner_inputchar) == '#') {
                scanner_inputchar += 1;
                // Skip blanks
                while (true) {
                    if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                    scanner_inputchar += 1;
                }
                // See if this is the one
                if ((stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "end", 3) == 0) 
                        || ((whichDirective == pMatchingElsifElseOrEndif) 
                            && (((stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "elsif", 5) == 0) 
                        || (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "elif", 4) == 0)) 
                        || (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "else", 4) == 0)))) {
                    scanner_inputchar = startchar;
                    break;
                } else if (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "if", 2) == 0) {
                    scanner_flushLinesUntilPreprocessorDirective (pMatchingEndif);
                }
            }
        }
    }

    if (lstringchar (scanner_inputline, scanner_inputchar) == EOF_) {
        string context;
        stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
        error (context, "Preprocessor syntax error: missing #endif directive", FATAL, 157);
    }
}

static void scanner_sortTokenPatterns (void);

static void scanner_handlePreprocessorDirective (void) {
    assert (lstringchar (scanner_inputline, scanner_inputchar) == '#');

    // TXL preprocessor directives
    //
    //  #pragma -arg ...                                        Set command line arguments
    //
    //  #def[ine] SYM                                           Define symbol
    //  #undef[ine] SYM                                         Undefine symbol
    //
    //  #if[n][def] SYM { [[and|or] SYM } [then]                If symbol defined
    //  #els[e]if[n][def] SYM { [[and|or] SYM } [then]          Elsif symbol defined
    //  #else
    //  #end[if]
    //
    //  #! ...                                                  Unix kernel directive

    // Skip #
    scanner_inputchar += 1;

    // Skip blanks
    while (true) {
        if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
        scanner_inputchar += 1;
    }

    // Which directive do we have?
    if ((stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "def", 3) == 0) 
            || (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "undef", 5) == 0)) {

        // #def[ine] SYM
        // #undef[ine] SYM
        const bool define = lstringchar (scanner_inputline, scanner_inputchar) == 'd';
        while (true) {
            if ((lstringchar (scanner_inputline, scanner_inputchar) == EOS) 
                || (charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }

        // Skip blanks
        while (true) {
            if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }

        // Get symbol
        const int startchar = scanner_inputchar;
        while (true) {
            if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }
        string symbol;
        lsubstring (symbol, scanner_inputline, startchar, (scanner_inputchar - 1));
        if (stringcmp (symbol, "") == 0) {
            string context;
            stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
            error (context, "Preprocessor syntax error: missing symbol in #define or #undefine directive", FATAL, 158);
        }

        // Define or undefine it
        if (define) {
            options_setIfdefSymbol (symbol);
        } else {
            options_unsetIfdefSymbol (symbol);
        }

        // Discard line
        scanner_getInputLine ();
        scanner_linenum += 1;

    } else if ((((stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "if", 2) == 0) 
            || (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "elsif", 5) == 0)) 
            || (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "elif", 4) == 0)) 
            || (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "elseif", 6) == 0)) {

        // #el[s][e]if[n][def] [not] SYM [then] 
        // #if[n][def] [not] SYM [then]
        const bool firstif = lstringchar (scanner_inputline, scanner_inputchar) == 'i';
        const int ifindex = stringindex (&(lstringchar (scanner_inputline, scanner_inputchar)), "if");
        bool negated = lstringchar (scanner_inputline, scanner_inputchar - 1 + ifindex + 2) == 'n';
        while (true) {
            if ((lstringchar (scanner_inputline, scanner_inputchar) == EOS) 
                || (charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }

        // Skip blanks
        while (true) {
            if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }

        // Get symbol
        int startchar = scanner_inputchar;
        while (true) {
            if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }
        string symbol;
        lsubstring (symbol, scanner_inputline, startchar, (scanner_inputchar - 1));
        if (stringcmp (symbol, "") == 0) {
            string context;
            stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
            error (context, "Preprocessor syntax error: missing symbol in #if or #elsif directive", FATAL, 159);
        }

        // Check for 'if not SYM'
        if (stringcmp (symbol, "not") == 0) {
            negated = !negated;

            // Skip blanks
            while (true) {
                if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                scanner_inputchar += 1;
            }

            // Get symbol
            startchar = scanner_inputchar;
            while (true) {
                if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                scanner_inputchar += 1;
            }
            lsubstring (symbol, scanner_inputline, startchar, (scanner_inputchar - 1));
            if (stringcmp (symbol, "") == 0) {
                string context;
                stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
                error (context, "Preprocessor syntax error: missing symbol in #if or #elsif directive", FATAL, 159);
            }
        }

        // Test it
        if (firstif) {
            scanner_pushIfdef (symbol, negated);
            if (!scanner_trueIfdef ()) {
                // Trash the true part
                scanner_flushLinesUntilPreprocessorDirective (pMatchingElsifElseOrEndif);
            } else {
                // Discard line
                scanner_getInputLine ();
                scanner_linenum += 1;
            }

        } else {
            if (scanner_ifdefTop == 0) {
                string context;
                stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
                error (context, "Preprocessor syntax error: #else or #elsif not nested inside #if", FATAL, 161);
            }
            if (scanner_trueIfdef ()) {
                // Trash the false part
                scanner_flushLinesUntilPreprocessorDirective (pMatchingEndif);
            } else {
                // The previous alternatives were false; try this one
                scanner_popIfdef ();
                scanner_pushIfdef (symbol, negated);
                if (!scanner_trueIfdef ()) {
                    // Trash the true part
                    scanner_flushLinesUntilPreprocessorDirective (pMatchingElsifElseOrEndif);
                } else {
                    // Discard line
                    scanner_getInputLine ();
                    scanner_linenum += 1;
                }
            }
        }

    } else if (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "else", 4) == 0) {
        // #else
        if (scanner_ifdefTop == 0) {
            string context;
            stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
            error (context, "Preprocessor syntax error: #else or #elsif not nested inside #if", FATAL, 161);
        }
        if (scanner_trueIfdef ()) {
            // Trash the false part
            scanner_flushLinesUntilPreprocessorDirective (pMatchingEndif);
        } else {
            // Discard line
            scanner_getInputLine ();
            scanner_linenum += 1;
        }

    } else if (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "end", 3) == 0) {
        // #end[if]
        scanner_popIfdef ();
        scanner_getInputLine ();
        scanner_linenum += 1;

    } else if (stringncmp (&(lstringchar (scanner_inputline, scanner_inputchar)), "pragma", 6) == 0) {
        // #pragma -ARG ...
        options_processOptionsString (&(lstringchar (scanner_inputline, scanner_inputchar)));

        // Discard line
        scanner_getInputLine ();
        scanner_linenum += 1;

        // If character maps were updated, reset scanner links - JRC 10.4d
        if (options_updatedChars) {
            scanner_sortTokenPatterns ();
        }

    } else if (lstringchar (scanner_inputline, scanner_inputchar) == '!') {
        // #! ... Unix kernel directivee - just discard the line
        scanner_getInputLine ();
        scanner_linenum += 1;

    } else {
        string context, message;
        stringprintf (context, "line %d of %s", (scanner_linenum + 1), fileNames[scanner_filenum]);
        stringprintf (message, "Preprocessor directive syntax error at or near:\n    %s", &(lstringchar (scanner_inputline, scanner_inputchar)));
        error (context, message, FATAL, 163);
    }
}

static void scanner_skipTxlComment (void) {
    assert (lstringchar (scanner_inputline, scanner_inputchar) == '%');

    // Multiline comment %( )% or %( )% - JRC 22.9.07
    if ((lstringchar (scanner_inputline, scanner_inputchar + 1) == '(') || (lstringchar (scanner_inputline, scanner_inputchar + 1) == '{')) {
        string  comend;
        stringcpy (comend, ")%");
        if (lstringchar (scanner_inputline, scanner_inputchar + 1) == '{') {
            stringcpy (comend, "}%");
        }

        int comindex;
        while (true) {
            comindex = stringindex (&(lstringchar (scanner_inputline, scanner_inputchar)), comend);
            if (comindex != 0) break;

            scanner_getInputLine ();
            scanner_linenum += 1;

            if (lstringchar (scanner_inputline, scanner_inputchar) == EOF_) break;
        }

        if (lstringchar (scanner_inputline, scanner_inputchar) == EOF_) {
            string context;
            stringcpy (context, "at end of "), stringcat (context, fileNames[scanner_filenum]);
            error (context, "Syntax error - comment ends at end of file", FATAL, 164);
        }

        scanner_inputchar = comindex + 2;  // keep rest of line after )% 

    } else {
        // Single line comment - discard line
        scanner_getInputLine ();
        scanner_linenum += 1;
    }
}

static void scanner_skipSeparators (void) {
    if ((options_option[multiline_p]) && (!scanner_txlSource)) {
        while (true) {
            if (!(options_option[charinput_p])) {
                while (true) {
                    // Skip blanks
                    if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                    if (lstringchar (scanner_inputline, scanner_inputchar) == '\n') {
                        scanner_linenum += 1;
                    }
                    scanner_inputchar += 1;
                }
            }

            // See if we need to get a new line
             
            // Make sure that there is always at least one full maxLineLength lookahead when scanning the next token 
            assert (inputBufferFactor >= 2);
            assert (maxLongStringLength >= maxStringLength * 2);

            if (scanner_fileInput && (scanner_inputchar > (maxLineLength * (inputBufferFactor - 1)))) {
                // Watch out, shifting on top of itself!
                lstringcpy (scanner_inputline, &(lstringchar (scanner_inputline, scanner_inputchar)));

                scanner_getInputLine ();

                if ((!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) || (options_option[charinput_p])) {
                    return;
                }

            } else if (lstringchar (scanner_inputline, scanner_inputchar) == EOS) {
                lstringchar (scanner_inputline, 1) = EOS;

                scanner_getInputLine ();

                if ((!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) || (options_option[charinput_p])) {
                    return;
                }

            } else {
                return;
            }
        }

    } else {
        assert (scanner_txlSource || (! options_option[multiline_p]));

        while (true) {
            // Skip blanks
            const bool beginningOfLine = scanner_inputchar == 1;
            if (scanner_txlSource || (!(options_option[charinput_p]))) {
                while (true) {
                    if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;

                    if (lstringchar (scanner_inputline, scanner_inputchar) == '\n') {
                        scanner_linenum += 1;
                    }

                    scanner_inputchar += 1;
                }
            }

            // See if we need to get a new line
            if (lstringchar (scanner_inputline, scanner_inputchar) == EOS) {
                scanner_getInputLine ();
                if (lstringchar (scanner_inputline, scanner_inputchar) == EOF_) break;

            // Check for TXL comments and preprocessor directives
            } else if (scanner_txlSource) {
                if (lstringchar (scanner_inputline, scanner_inputchar) == '%') {
                    scanner_skipTxlComment ();
                } else if (beginningOfLine && (lstringchar (scanner_inputline, scanner_inputchar) == '#')) {
                    scanner_handlePreprocessorDirective ();
                } else {
                    return;
                }

            } else {
                return;
            }
        }
    }
}

static bool scanner_scanToken (const scanner_patternT pattern, const int startpos, const int endpos, const bool test)
{
    // Walk through pattern
    int pos = startpos;
    const int startchar = scanner_inputchar;
    const int startlinenum = scanner_linenum;

    while (true) {
        scanner_patternCodeT pat = pattern[pos];
        bool fail = true;

        switch (pat) {

            // Character classes

            case DIGIT:
                {
                    pos += 1;
                    const bool repeated = charset_repeaterP[((unsigned char) pattern[pos])];
                    while (true) {
                        if (!(charset_digitP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;

            case ALPHA:
                {
                    pos += 1;
                    const bool repeated = charset_repeaterP[((unsigned char) pattern[pos])];
                    while (true) {
                        if (!(charset_alphaP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;

            case ALPHAID:
                {
                    pos += 1;
                    const bool repeated = charset_repeaterP[((unsigned char) pattern[pos])];
                    while (true) {
                        if (!(charset_alphaidP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;

            case ID:
                {
                    pos += 1;
                    const bool repeated = charset_repeaterP[((unsigned char) pattern[pos])];
                    while (true) {
                        if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case UPPER:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (!(charset_upperP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case UPPERID:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (!(charset_upperidP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case LOWER:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (!(charset_lowerP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case LOWERID:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (!(charset_loweridP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case SPECIAL:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (!(charset_specialP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case ANY:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOS) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NEWLINE:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) != '\n') break;
                        scanner_inputchar += 1;
                        scanner_linenum += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case RETURN:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) != '\r') break;
                        scanner_inputchar += 1;
                        // Handle CR and CR-LF line endings
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) != '\n') {
                            scanner_linenum += 1;
                        }
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case TAB:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) != '\t') break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            // Inverted character classes

            case NOTDIGIT:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_digitP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTALPHA:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_alphaP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTALPHAID:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_alphaidP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTID:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTUPPER:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_upperP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTUPPERID:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_upperidP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTLOWER:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_lowerP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTLOWERID:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_loweridP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTSPECIAL:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((charset_specialP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTNEWLINE:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar) == '\n') 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTRETURN:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar) == '\r') 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case NOTTAB:
                {
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar) == '\t') 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        };
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    };
                }
                break;

            case CHOICE:
                {
                    // Choice - a set of alternative subpatterns
                    pos += 1;
                    const int len = pattern[pos];

                    // Calculate range of alternative subpatterns in the pattern
                    const int altsubsstartpos = pos + 1;
                    const int altsubsendpos = pos + len;

                    // Skip choice
                    pos += len + 2;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];

                    while (true) {
                        // Try each alternative supattern
                        int substartpos = altsubsstartpos;
                        bool subfail = true;
                        const int startinputchar = scanner_inputchar;

                        while (true) {
                            if (substartpos > altsubsendpos) break;

                            // Isolate the alternative subpattern
                            const int spat = pattern[substartpos];
                            int subendpos = substartpos;

                            // If the alternative is non-trivial, find the end of it
                            switch (spat) {
                                case CHOICE: case NOTCHOICE: case SEQUENCE: case NOTSEQUENCE:
                                    {
                                        subendpos = (substartpos + (pattern[(substartpos + 1)])) + 2;
                                    }
                                    break;
                                case ESCAPE: case NOT:
                                    {
                                        subendpos += 1;
                                    }
                                    break;
                                default :
                                    break;
                            }

                            // If it has a repeat indicator, include that
                            if (charset_repeaterP[((unsigned char) (pattern[(subendpos + 1)]))]) {
                                subendpos += 1;
                            }

                            // Now see if we can scan one of those
                            if (scanner_scanToken (pattern, substartpos, subendpos, test)) {
                                subfail = false;
                                break;
                            }

                            // Otherwise move on to the next alternative
                            substartpos = subendpos + 1;
                        }

                        // If all alternatives failed, no sense going on
                        if (subfail) break;

                        // Otherwise we found at least one
                        fail = false;

                        if (!repeated) break;

                        // Don't repeat a null forever!
                        if (scanner_inputchar == startinputchar) break;
                    }
                }
                break;

            case NOTCHOICE:  // #[...]
                {
                    // Negated choice - a set of alternative subpatterns,
                    // all of which must fail in order to accept a single character
                    pos += 1;
                    const int len = pattern[pos];

                    // Calculate range of alternative subpatterns in the pattern
                    const int altsubsstartpos = pos + 1;
                    const int altsubsendpos = pos + len;

                    // Skip choice
                    pos += len + 2;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];

                    while (true) {
                        // Try each alternative subpattern - if any succeeds, we are done
                        int substartpos = altsubsstartpos;
                        bool subfail = true;
                        const int startinputchar = scanner_inputchar;
                        const int substartlinenum = scanner_linenum;

                        while (true) {
                            if (substartpos > altsubsendpos) break;

                            // Isolate the alternative subpattern
                            const int spat = pattern[substartpos];
                            int subendpos = substartpos;

                            // If the alternative is non-trivial, find the end of it
                            switch (spat) {
                                case CHOICE: case NOTCHOICE: case SEQUENCE: case NOTSEQUENCE:
                                    {
                                        subendpos = (substartpos + (pattern[substartpos + 1])) + 2;
                                    }
                                    break;
                                case ESCAPE: case NOT:
                                    {
                                        subendpos += 1;
                                    }
                                    break;
                                default :
                                    break;
                            }

                            // If it has a repeat indicator, include that
                            if (charset_repeaterP[((unsigned char) (pattern[subendpos + 1]))]) {
                                subendpos += 1;
                            }

                            // Now see if we can scan one of those
                            if (scanner_scanToken (pattern, substartpos, subendpos, test)) {
                                subfail = false;
                                break;
                            }

                            // Otherwise move on to the next alternative
                            substartpos = subendpos + 1;
                        }

                        // If any alternative succeeded, we are done
                        if (!subfail) {
                            scanner_inputchar = startinputchar;
                            scanner_linenum = substartlinenum;
                            break;
                        }

                        // Otherwise we found at least one - remember and accept it
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOS) break;

                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        }

                        scanner_inputchar += 1;

                        fail = false;

                        if (!repeated) break;
                    }
                }
                break;

            case SEQUENCE:
                {
                    // Sequence - a grouped subpattern
                    pos += 1;
                    const int len = pattern[pos];

                    // Calculate end of subpattern
                    const int substartpos = pos + 1;
                    const int subendpos = pos + len;

                    // Skip sequence
                    pos += len + 2;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];

                    // Now step through the sequence
                    while (true) {
                        // scanToken accepts a whole sequence anyway, so this is easy
                        if (!scanner_scanToken (pattern, substartpos, subendpos, test)) break;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;

            case NOTSEQUENCE:   // #(...)
                {
                    // Negated sequence - a sequence that must fail in order to accept a single character
                    pos += 1;
                    const int len = pattern[pos];

                    // Calculate end of subpattern
                    const int substartpos = pos + 1;
                    const int subendpos = pos + len;

                    // Skip sequence
                    pos += len + 2;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];

                    while (true) {
                        bool subfail = true;
                        const int startinputchar = scanner_inputchar;

                        // See if we can scan the sequence
                        if (scanner_scanToken (pattern, substartpos, subendpos, test)) {
                            subfail = false;
                        }

                        // If the sequence succeeded, we are done
                        if (!subfail) {
                            scanner_inputchar = startinputchar;
                            break;
                        }

                        // Otherwise we found at least one - remember and accept it
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOS) break;
                        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                            scanner_linenum += 1;
                        }

                        scanner_inputchar += 1;
                        fail = false;

                        if (!repeated) break;
                    }
                }
                break;

            case LOOKAHEAD:     // \:...
                {
                    // Lookahead - test end of pattern
                    const int substartpos = pos + 1;
                    const int subendpos = endpos;

                    pos = endpos + 1;   // skip lookahead pattern

                    bool subfail = true;
                    const int lookinputchar = scanner_inputchar;
                    const int looklinenum = scanner_linenum;

                    // See if we can scan the lookahead
                    if (scanner_scanToken (pattern, substartpos, subendpos, test)) {
                        subfail = false;
                    }

                    // Either way, we back up
                    scanner_inputchar = lookinputchar;
                    scanner_linenum = looklinenum;

                    if (!subfail) {
                        // The lookahead succeeded
                        goto scanexit;
                    } else {
                        // The lookahead failed
                    }
                }
                break;

            case NOTLOOKAHEAD:  // #\:...
                {
                    // Inverted lookahead - test not end of pattern
                    const int substartpos = pos + 1;
                    const int subendpos = endpos;

                    pos = endpos + 1;   // skip lookahead pattern

                    bool subfail = true;
                    const int lookinputchar = scanner_inputchar;
                    const int looklinenum = scanner_linenum;

                    // See if we can scan the lookahead
                    if (scanner_scanToken (pattern, substartpos, subendpos, test)) {
                        subfail = false;
                    }

                    // Either way, we back up
                    scanner_inputchar = lookinputchar;
                    scanner_linenum = looklinenum;

                    if (subfail) {
                        // The lookahead succeeded
                        goto scanexit;
                    } else {
                        // The lookahead failed
                    }
                }
                break;

            case ESCAPE:
                {
                    // Escaped meta-character
                    pat = pattern[pos + 1];
                    pos += 2;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (lstringchar (scanner_inputline, scanner_inputchar) != pat) break;
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;

            case NOT:
                {
                    // Inverted character
                    pat = pattern[pos + 1];
                    pos += 2;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if ((lstringchar (scanner_inputline, scanner_inputchar) == pat) 
                            || (lstringchar (scanner_inputline, scanner_inputchar) == EOS)) break;
                        if (lstringchar (scanner_inputline, scanner_inputchar) == '\n') {
                            scanner_linenum += 1;
                        }
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;

            default :
                {
                    // Literal character
                    pos +=1;
                    const bool repeated = charset_repeaterP[((unsigned char) (pattern[pos]))];
                    while (true) {
                        if (lstringchar (scanner_inputline, scanner_inputchar) != pat) break;
                        if (lstringchar (scanner_inputline, scanner_inputchar) == '\n') {
                            scanner_linenum += 1;
                        }
                        scanner_inputchar += 1;
                        fail = false;
                        if (!repeated) break;
                    }
                }
                break;
        }

        if (charset_optionalP[((unsigned char) (pattern[pos]))]) {
            // The pattern allows null matches - so ok no matter what!
            pos += 1;

        } else {
            // The pattern requires us to accept something
            if (pattern[pos] == '+') {
                pos += 1;
            }

            // If we failed to do that, back up so we can try another alternative
            if (fail) {
                if (!test) {
                    scanner_inputchar = startchar;
                }
                scanner_linenum = startlinenum;
                return (false);
            }
        }

        if (pos > endpos) break;
    }

    scanexit: return (true);
}

static bool scanner_scanCompoundLiteral (const int litindex)
{
    int i = litindex;
    char *input = &(lstringchar (scanner_inputline, scanner_inputchar));

    while (true) {
        const struct scanner_compoundT *lit = &(scanner_compoundTokens[i]);
        string temp;
        lsubstring (temp, input, 1, (lit->length));
        if (stringcmp (temp, lit->literal) == 0) {
            scanner_inputchar += lit->length;
            return (true);
        }

        i += 1;

        if ((i > scanner_nCompounds) || (stringchar (scanner_compoundTokens[i].literal, 1) != lstringchar (input, 1))) break;
    }

    return (false);
}

static int scanner_commentindex (const tokenT commentstarttoken)
{
    for (int c = 1; c <= scanner_nComments; c++) {
        if (commentstarttoken == (scanner_commentStart[c])) {
            return (c);
        }
    }
    return (0);
}

static void scanner_scanComment (const int startchararg, const int comindex)
{
    int startchar = startchararg;
    tokenT comtoken;

    string indent, comend;
    stringcpy (indent, "");
    stringcpy (comend, *ident_idents[scanner_commentEnd[comindex]]);

    if (scanner_commentEnd[comindex] == NOT_FOUND) {
        stringcpy (comend, "\n");
    }


    bool firstline = true;
    const int comstartlength = lstringlen (*ident_idents[scanner_commentStart[comindex]]);

    while (true) {
        if (startchar > (maxLineLength * (inputBufferFactor - 1))) {
            // Watch out, shifting on top of itself!
            lstringcpy (scanner_inputline, &(lstringchar (scanner_inputline, startchar)));
            scanner_getInputLine ();
            startchar = scanner_inputchar;

        } else if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOS) {
            lstringchar (scanner_inputline, 1) = EOS;
            scanner_getInputLine ();
            startchar = scanner_inputchar;
        }

        int comendindex = lstringindex (&(lstringchar (scanner_inputline, startchar)), comend);

        if ((firstline && (comendindex != 0)) && (comendindex <= comstartlength)) {
            comendindex = lstringindex (&(lstringchar (scanner_inputline, startchar + comstartlength)), comend);
            if (comendindex != 0) {
                comendindex += comstartlength;
            }
        }

        const int newlineindex = lstringindex (&(lstringchar (scanner_inputline, startchar)), "\n");

        if ((lstringchar (scanner_inputline, startchar) != EOF_) && (newlineindex != 0) 
                && ((comendindex == 0) || (newlineindex < comendindex))) {

            // We're continuing
            if (options_option[comment_token_p]) {
                // Include newline in internal commment lines 
                const char savedchar = lstringchar (scanner_inputline, startchar + newlineindex);
                // Cheat by truncating manually 
                lstringchar (scanner_inputline, startchar + newlineindex) = EOS;

                if (lstringlen (&lstringchar (scanner_inputline, startchar)) > (maxStringLength - stringlen (indent))) {
                    comtoken = ident_install (&lstringchar (scanner_inputline, startchar), treeKind_comment);
                } else {
                    // Safe to concatenate
                    string indentedComment;
                    stringcpy (indentedComment, indent), stringcat (indentedComment, &lstringchar (scanner_inputline, startchar));
                    comtoken = ident_install (indentedComment, treeKind_comment);
                }
                scanner_installToken (treeKind_comment, comtoken, comtoken);
                lstringchar (scanner_inputline, startchar + newlineindex) = savedchar;
            }

            scanner_linenum += 1;
            scanner_inputchar = startchar + newlineindex;

            if (scanner_commentEnd[comindex] == NOT_FOUND) break;

            if ((options_option[comment_token_p]) 
                    && (!(options_option[charinput_p]))) {      // don't trim or indent raw comments!
                while (true) {
                    if ((!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) 
                        || (lstringchar (scanner_inputline, scanner_inputchar) == '\n')) break;
                    scanner_inputchar += 1;
                }
                stringcpy (indent, "   ");
            }

        } else if (comendindex != 0) {



            // We're done
            if (options_option[comment_token_p]) {
                int lencomend = stringlen (comend);
                if (stringcmp (comend, "\n") == 0) {
                    scanner_linenum += 1;
                    lencomend = 0;
                }

                // Truncate in place
                const char savedchar = lstringchar (scanner_inputline, startchar + comendindex + lencomend - 1);
                lstringchar (scanner_inputline, startchar + comendindex + lencomend - 1) = EOS;

                if (lstringlen (&(lstringchar (scanner_inputline, startchar))) > (maxStringLength - lstringlen (indent))) {
                    comtoken = ident_install (&(lstringchar (scanner_inputline, startchar)), treeKind_comment);
                } else {
                    string indentedComment;
                    stringcpy (indentedComment, indent), stringcat (indentedComment, &lstringchar (scanner_inputline, startchar));
                    comtoken = ident_install (indentedComment, treeKind_comment);
                }

                scanner_installToken (treeKind_comment, comtoken, comtoken);
                lstringchar (scanner_inputline, startchar + comendindex + lencomend - 1) = savedchar;
            }

            scanner_inputchar = ((startchar + comendindex) + lstringlen (comend)) - 1;

            if (stringcmp (comend, "\n") == 0) {
                if (options_option[comment_token_p]) {
                    scanner_linenum -= 1;
                }
                scanner_inputchar -= 1;
            }

            break;

        } else { 
            // What the heck?
            assert (comendindex == 0);

            if (lstringchar (scanner_inputline, startchar) == EOF_) {
                string context;
                stringcpy (context, "at end of "), stringcat (context, fileNames[scanner_filenum]);
                error (context, "Syntax error - comment ends at end of file", FATAL, 164);
            } else {
                string message;
                stringprintf (message, "Input line too long (> %d characters)", maxLineLength);
                error ("", message, LIMIT_FATAL, 144);
            }
        }

        startchar = scanner_inputchar;
        firstline = false;
    }
}

static void scanner_sortCompoundTokens (void) {
    // Step 1. Bubblesort compoundTokens to ascending order by first character
    for (int k = scanner_nCompounds; k >= 2; k--) {
        bool swap = false;
        for (int j = 2; j <= k; j++) {
            if (stringchar (scanner_compoundTokens[j - 1].literal, 1) > stringchar (scanner_compoundTokens[j].literal, 1)) {
                struct scanner_compoundT temp;
                struct scanner_compoundT *left = &(scanner_compoundTokens[j - 1]);
                struct scanner_compoundT *right = &(scanner_compoundTokens[j]);
                temp.length = left->length; stringcpy (temp.literal, left->literal);
                left->length = right->length; stringcpy (left->literal, right->literal);
                right->length = temp.length; stringcpy (right->literal, temp.literal);
                swap = true;
            }
        }
        if (!swap) break;
    }

    // Step 2. Sort compoundTokens within first character in descending order of length
    while (true) {
        bool swap = false;
        for (int k = 1; k <= scanner_nCompounds - 1; k++) {
            if (stringchar (scanner_compoundTokens[k].literal, 1) == stringchar (scanner_compoundTokens[k + 1].literal, 1)) {
                if ((scanner_compoundTokens[k].length) < (scanner_compoundTokens[k + 1].length)) {
                    struct scanner_compoundT temp;
                    struct scanner_compoundT *left = &(scanner_compoundTokens[k]);
                    struct scanner_compoundT *right = &(scanner_compoundTokens[k + 1]);
                    temp.length = left->length; stringcpy (temp.literal, left->literal);
                    left->length = right->length; stringcpy (left->literal, right->literal);
                    right->length = temp.length; stringcpy (right->literal, temp.literal);
                    swap = true;
                }
            }
        }
        if (!swap) break;
    }

    // Mark end of compound tokens
    stringchar (scanner_compoundTokens[scanner_nCompounds + 1].literal, 1) = (char) 255;       // (sic)
    stringchar (scanner_compoundTokens[scanner_nCompounds + 1].literal, 2) = EOS;
    scanner_compoundTokens[scanner_nCompounds + 1].length = 1;

    // Step 3. Build the literal index table
    int k = 1;
    for (int c = 0; c <= 255; c++) {
        if ((k <= scanner_nCompounds) && (stringchar (scanner_compoundTokens[k].literal, 1) == c)) {
            scanner_compoundIndex[c] = k;
            while (true) {
                k += 1;
                if ((k > scanner_nCompounds) || (stringchar (scanner_compoundTokens[k].literal, 1) > c)) break; 
            }
        } else {
            scanner_compoundIndex[c] = 0;
        }
    }
}

static void scanner_sortKeywords (const int firstkey)
{
    // Step 1. Move active keyword set to beginning of keywords
    if (firstkey > 1) {
        int kk = 0;
        for (int k = firstkey; k <= scanner_lastKey; k++) {
            kk += 1;
            scanner_keywordTokens[kk] = scanner_keywordTokens[k];
        }
        scanner_nKeys = kk;
    }

    // Step 2. Bubblesort active keyword tokens to ascending order
    for (int k = scanner_nKeys; k >= 2; k--) {
        bool swap = false;
        for (int j = 2; j <= k; j++) {
            if (scanner_keywordTokens[j - 1] > scanner_keywordTokens[j]) {
                int temp = scanner_keywordTokens[j - 1];
                scanner_keywordTokens[j - 1] = scanner_keywordTokens[j];
                scanner_keywordTokens[j] = temp;
                swap = true;
            }
        }
        if (!swap) break;
    }
}

static void scanner_linkpattern (const char c, const int p)
{
    // If this pattern can begin with character c ...
    lstringchar (scanner_inputline, 1) = c;
    lstringchar (scanner_inputline, 2) = EOS;
    scanner_inputchar = 1;

    if (scanner_scanToken (scanner_tokenPatterns[p].pattern, 1, (scanner_tokenPatterns[p].length), true)) {
        // (just want side effect on inputchar)
    }

    //  ... then we need to link it in as an alternative
    if (scanner_inputchar != 1) {
        scanner_nPatternLinks += 1;

        if (scanner_nPatternLinks >= maxTokenPatternLinks) {    // (sic)
            string message;
            stringprintf (message, "Too many token patterns (links) (> %d", maxTokenPatternLinks);
            error ("", message, LIMIT_FATAL, 166);
        }

        if (scanner_patternIndex[(unsigned char) c] == 0) {
            scanner_patternIndex[(unsigned char) c] = scanner_nPatternLinks;
        }

        scanner_patternLink[scanner_nPatternLinks] = p;
    }
}

static void scanner_sortTokenPatterns (void) {
    // This routine and linkpattern() use the first two characters 
    // in inputline for temporary pattern tests
    const char saveinputline1 = lstringchar (scanner_inputline, 1);
    const char saveinputline2 = lstringchar (scanner_inputline, 2);
    const int saveinputchar = scanner_inputchar;

    for (int p = 1; p <= scanner_nPatterns; p++) {
        // If this pattern can accept the null string, something's wrong ...
        lstringchar (scanner_inputline, 1) = EOS;
        scanner_inputchar = 1;

        if (scanner_scanToken (scanner_tokenPatterns[p].pattern, 1, (scanner_tokenPatterns[p].length), true)) {
            string message;
            stringprintf (message, "Token pattern for '%s' accepts the null string", *ident_idents[scanner_tokenPatterns[p].name]);
            error ("", message, WARNING, 165);
        }
    }

    // Build the pattern index table
    scanner_nPatternLinks = 0;

    for (int c = 0; c <= 255; c++) {
        scanner_patternIndex[(unsigned char) c] = 0;

        // User token patterns take precedence, but in order specified
        for (int p = scanner_nPredefinedPatterns + 1; p <= scanner_nPatterns; p++) {
            scanner_linkpattern ((unsigned char) c, p);
        }

        // Then predefined patterns
        for (int p = 1; p <= scanner_nPredefinedPatterns; p++) {
            scanner_linkpattern ((unsigned char) c, p);
        }

        if (scanner_patternIndex[(unsigned char) c] != 0) {
            // Mark end of possibilities
            scanner_nPatternLinks += 1;
            scanner_patternLink[scanner_nPatternLinks] = 0;

            // Consolidate lists
            if ((c > EOS) && ((scanner_patternIndex[(unsigned char) (c - 1)]) != 0)) {
                int l1 = scanner_patternIndex[(unsigned char) (c - 1)];
                int l2 = scanner_patternIndex[(unsigned char) c];
                bool same = true;
                while (true) {
                    if ((scanner_patternLink[l1]) != (scanner_patternLink[l2])) {
                        same = false;
                        break;
                    }

                    if (scanner_patternLink[l1] == 0) break;

                    l1 += 1;
                    l2 += 1;
                }

                if (same) {
                    scanner_nPatternLinks = scanner_patternIndex[(unsigned char) c] - 1;
                    scanner_patternIndex[c] = scanner_patternIndex[(unsigned char) (c - 1)];
                }
            }
        }
    }

    // Restore state of inputline after temporary pattern tests
    lstringchar (scanner_inputline, 1) = saveinputline1;
    lstringchar (scanner_inputline, 2) = saveinputline2;
    scanner_inputchar = saveinputchar;
}

static void scanner_setTokenPattern (const int p, const enum treeKindT kind, const string name, const string patternString)
{
    const int patternlength = stringlen (patternString);
    scanner_patternT encodedPattern;

    // Brackets stack, to match [ ] and ( ) in patterns
    struct bracketsT {
        char closebracket;
        int index;
    };
    // 1-origin [1 .. maxStringLength]
    struct bracketsT brackets[maxStringLength + 1];
    int bracketsTop = 0;

    int i = 1;
    int j = 1;

    while (true) {
        if (i > patternlength) break;

        char pi = patternString[i - 1];
        int encodedpi = pi;

        if ((pi == '\\') 
                || ((pi == '#') && ((patternlength <= i) || ((patternString[(i + 1) - 1] != '[') && (patternString[(i + 1) - 1] != '('))))) {
            if (i == patternlength) {
                string message;
                stringprintf (message, "Syntax error in token pattern for '%s' (\\ or # at end of pattern)", name);
                error ("", message, FATAL, 167);
            }

            if (pi == '\\') {
                i += 1;
                pi = patternString[i - 1];
                encodedpi = pi;

                int code = EOSPAT;

                for (int c = 1; c <= nPatternChars; c++) {
                    if (scanner_patternChars[c] == pi) {
                        code = scanner_patternCodes[c];
                        break;
                    }
                }

                if (code == EOSPAT) {
                    if ((!(charset_metaP[(unsigned char) pi])) && (pi != '"') && (pi != ':')) {
                        string message;
                        stringprintf (message, "Escaped character \\%c in token pattern for '%s' is not a valid token pattern meta-character",
                            (char) pi, name); 
                        error ("", message, WARNING, 169);
                    }

                    if (pi == ':') {
                        // Lookahead
                        if (bracketsTop > 0) {
                            string message;
                            stringprintf (message, "Syntax error in token pattern for '%s' (lookahead test \\: must be a trailing pattern)", name);
                            error ("", message, FATAL, 181);
                        }

                        encodedpi = LOOKAHEAD;
                    } else {
                        encodedPattern[j] = ESCAPE;
                        j += 1;
                    }
                } else {
                    encodedpi = code;
                }

            } else {
                assert (pi == '#');
                i += 1;
                pi = patternString[i - 1];

                if ((pi == '\\') && (i < patternlength)) {
                    i += 1;
                    pi = patternString[i - 1];
                }

                encodedpi = pi;

                scanner_patternCodeT code = EOSPAT;

                for (int c = 1; c <= nPatternChars; c++) {
                    if ((scanner_patternChars[c - 1]) == pi) {
                        code = scanner_patternNotCodes[c - 1];
                        break;
                    }
                }

                if (code == EOSPAT) {
                    if (pi == ':') {
                        // Negated lookahead #: or #\:
                        if (bracketsTop > 0) {
                            string message;
                            stringprintf (message, "Syntax error in token pattern for '%s' (lookahead test \\: must be a trailing pattern)", name);
                            error ("", message, FATAL, 181);
                        }
                        encodedpi = NOTLOOKAHEAD;
                    } else {
                        // Not of a regular character
                        encodedPattern[j] = NOT;
                        j += 1;
                    }
                } else {
                    encodedpi = code;
                }
            }

        } else if ((pi == '[') || (((pi == '#') && (patternlength > i)) && (patternString[(i + 1) - 1] == '['))) {
            if (pi == '#') {
                i += 1;
                encodedPattern[j] = NOTCHOICE;
            } else {
                encodedPattern[j] = CHOICE;
            }
            j += 1;
            bracketsTop += 1;
            brackets[bracketsTop - 1].closebracket = ']';
            brackets[bracketsTop - 1].index = j;
            pi = EOS;
            encodedpi = EOSPAT;

        } else if ((pi == '(') || (((pi == '#') && (patternlength > i)) && (patternString[(i + 1) - 1] == '('))) {
            if (pi == '#') {
                i += 1;
                encodedPattern[j] = NOTSEQUENCE;
            } else {
                encodedPattern[j] = SEQUENCE;
            }
            j += 1;
            bracketsTop += 1;
            brackets[bracketsTop - 1].closebracket = ')';
            brackets[bracketsTop - 1].index = j;
            pi = EOS;
            encodedpi = EOSPAT;

        } else if ((pi == ']') || (pi == ')')) {
            if ((bracketsTop > 0) && ((brackets[bracketsTop - 1].closebracket) == pi)) {
                const int len = (j - (brackets[bracketsTop - 1].index)) - 1;
                assert (encodedPattern[brackets[bracketsTop - 1].index] == EOSPAT);
                encodedPattern[brackets[bracketsTop - 1].index] = len;
                bracketsTop -= 1;
            } else {
                string message;
                stringprintf (message, "Syntax error in token pattern for '%s' (unbalanced () or [])", name);
                error ("", message, FATAL, 170);
            }
            encodedpi = pi;

        // Handle magic characters by automatically escaping them - this enables full 8-bit character set handling - JRC 10.4d
        } else if (charset_magicP[(unsigned char) pi]) {
            encodedPattern[j] = ESCAPE;
            j += 1;
            encodedpi = pi;
        }

        encodedPattern[j] = encodedpi;
        j += 1;
        i += 1;
    }

    if (bracketsTop > 0) {
        string message;
        stringprintf (message, "Syntax error in token pattern for '%s' (unbalanced () or [])", name);
        error ("", message, FATAL, 170);
    }

    encodedPattern[j] = EOSPAT;

    // Special case for newline comments
    if ((stringcmp (name, "comment") == 0) && (encodedPattern[1] == NEWLINE)) {
        options_option[nlcomments_p] = true;
        scanner_patternNLCommentIndex = p;
    }

    // Install the token pattern definition
    struct scanner_patternEntryT *tp = &(scanner_tokenPatterns[p]);
    tp->kind = kind;
    tp->name = ident_install (name, treeKind_id);
    if ((kind >= firstUserTokenKind) && (kind <= lastUserTokenKind)) {
        kindType[kind] = tp->name;
    }
    structassign (tp->pattern, encodedPattern);
    tp->length = j - 1;
}

// Patterns for predefined tokens
static int scanner_idPattern, scanner_numberPattern, scanner_stringlitPattern, scanner_charlitPattern;

// TXL language default keywords
#define nTxlKeywords 29
typedef char string12[13];
struct scanner_txlKeywordT {
    enum treeKindT kind;
    string12 text;
};

static const struct scanner_txlKeywordT scanner_txlKeywords[nTxlKeywords + 1] = {
    {treeKind_undefined, "UNUSED"},
    {treeKind_literal,"["},
    {treeKind_literal,"]"},
    {treeKind_literal,"|"},
    {treeKind_id,"end"},
    {treeKind_id,"keys"},
    {treeKind_id,"define"},
    {treeKind_id,"repeat"},
    {treeKind_id,"list"},
    {treeKind_id,"opt"},
    {treeKind_id,"rule"},
    {treeKind_id,"function"},
    {treeKind_id,"replace"},
    {treeKind_id,"by"},
    {treeKind_id,"match"},
    {treeKind_id,"skipping"},
    {treeKind_id,"construct"},
    {treeKind_id,"deconstruct"},
    {treeKind_id,"where"},
    {treeKind_id,"not"},
    {treeKind_id,"include"},
    {treeKind_id,"comments"},
    {treeKind_id,"compounds"},
    {treeKind_id,"tokens"},
    {treeKind_id,"all"},
    {treeKind_id,"import"},
    {treeKind_id,"export"},
    {treeKind_id,"assert"},
    {treeKind_literal, "..."}
};

// TXL predefined token classes
#define nTxlPatterns 7
typedef char string25[26];
struct scanner_txlPatternT {
    enum treeKindT kind;
    string12 name;
    string25  pattern;
}; 

static const struct scanner_txlPatternT scanner_txlPatterns[nTxlPatterns + 1] = {
    {treeKind_undefined, "UNUSED", "UNUSED"},
    // Default predefined token classes - order matters!
    {treeKind_stringlit, "stringlit", "\"[(\\\\\\c)#\"]*\""},
    {treeKind_charlit, "charlit", "\'[(\\\\\\c)#\']*\'"},
    {treeKind_id, "id", "\\u\\i*"},
    {treeKind_number, "number", "\\d+(.\\d+)?([eE][+-]?\\d+)?"},
    // These two only take effect when -char or -newline is specified
    {treeKind_space, "space", "[ \t]+"},
    {treeKind_newline, "newline", "\n"},
    // This one allows for ignoring input - intentionally undefined to begin with
    {treeKind_empty, "ignore", "\"$%&*/ UNDEFINED /*&%$\""}
};

static void scanner_defaultTokenPatterns (void) {
    // Predefined token classes
    for (int i = 1; i <= nTxlPatterns; i++) {
        const struct scanner_txlPatternT *tp = &(scanner_txlPatterns[i]);
        scanner_setTokenPattern (i, tp->kind, tp->name, tp->pattern);
    }

    // Number of predefined token classes
    scanner_nPatterns = nTxlPatterns;
    scanner_nPredefinedPatterns = scanner_nPatterns;

    // The [id] pattern, which must allow keywords
    scanner_idPattern = 3;

    // The [stringlit] and [charlit] patterns
    scanner_stringlitPattern = 1;
    assert (scanner_stringlitPattern < scanner_idPattern);      // allow for leading letter overrides, e.g., L"foo"
    scanner_charlitPattern = 2;
    assert (scanner_charlitPattern < scanner_idPattern);        // allow for leading letter overrides, e.g., U'foo'

    // The [number] pattern
    scanner_numberPattern = 4;
    assert (scanner_idPattern < scanner_numberPattern);         // allow for leading digit overrides (e.g., 3a)

    // Link in the default patterns
    scanner_sortTokenPatterns ();

    // Default TXL keywords
    for (int i = 1; i <= nTxlKeywords; i++) {
        scanner_keywordTokens[i] = ident_install (scanner_txlKeywords[i].text, scanner_txlKeywords[i].kind);
    }

    // Number of TXL keywords
    scanner_nKeys = nTxlKeywords;
    scanner_nTxlKeys = nTxlKeywords;
    scanner_lastKey = nTxlKeywords;

    // Link in the default keywords
    scanner_sortKeywords (1);
}

static void scanner_expectend (const string expectedword)
{
    scanner_skipSeparators ();
    int startchar = scanner_inputchar;

    while (true) {
        if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
        scanner_inputchar += 1;
    }

    string gotword;
    lsubstring (gotword, scanner_inputline, startchar, (scanner_inputchar - 1));

    if (stringcmp (gotword, expectedword) != 0) {
        string context, message;
        stringprintf (context, "line %d of %s", scanner_linenum, fileNames[scanner_filenum]);
        stringprintf (message, "Syntax error - expected 'end %s', got 'end %s'", expectedword, gotword);
        error (context, message, FATAL, 172);
    }
}

static bool scanner_isId (const string id)
{
    for (int i = 1; i <= stringlen (id); i++) {
        const char idi = id[i - 1];
        if (!(charset_idP[(unsigned char) idi])) {
            return (false);
        }
    }
    return (true);
}

static void scanner_setCompoundToken (const string literal)
{
    if (stringlen (literal) == 1) {
        // It's already a literal!
        return;
    }

    if (scanner_nCompounds == maxCompoundTokens) {
        string message;
        stringprintf (message, "Too many compound literals (> %d)", maxCompoundTokens);
        error ("", message, LIMIT_FATAL, 173);
    }

    scanner_nCompounds += 1;
    stringcpy (scanner_compoundTokens[scanner_nCompounds].literal, literal);
    scanner_compoundTokens[scanner_nCompounds].length = stringlen (literal);
}

static void scanner_setKeyword (const string keyword)
{
    if (scanner_lastKey == maxKeys) {
        string message;
        stringprintf (message, "Too many keywords (> %d)", maxKeys);
        error ("", message, LIMIT_FATAL, 174);
    }

    scanner_lastKey += 1;

    if (options_option[case_p]) {
        string lowerKeyword;
        stringcpy (lowerKeyword, keyword);
        stringtolower (lowerKeyword);
        scanner_keywordTokens[scanner_lastKey] = ident_install (lowerKeyword, treeKind_id);
    } else {
        scanner_keywordTokens[scanner_lastKey] = ident_install (keyword, treeKind_id);
    }
}

static void scanner_processCompoundTokens (void) {
    // User defined compound tokens
    while (true) {
        scanner_skipSeparators ();

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\'') {
            // Quoted literal token
            scanner_inputchar += 1;
        }

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOF_) break;

        const int startchar = scanner_inputchar;
        while (true) {
            if (charset_separatorP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) break;
            scanner_inputchar += 1;
        }

        string literal;
        lsubstring (literal, scanner_inputline, startchar, (scanner_inputchar - 1));

        if (stringcmp (literal, "end") == 0) break;

        scanner_setCompoundToken (literal);
    }

    scanner_expectend ("compounds");
    scanner_sortCompoundTokens ();
}

static void scanner_processCommentBrackets (void) {
    // User defined comment conventions
    while (true) {
        scanner_skipSeparators ();

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\'') {
            // Quoted comment bracket
            scanner_inputchar += 1;
        }

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOF_) break;

        int startchar = scanner_inputchar;
        while (true) {
            if (charset_separatorP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) break;
            scanner_inputchar += 1;
        }

        string commentbracket;
        lsubstring (commentbracket, scanner_inputline, startchar, (scanner_inputchar - 1));

        if (stringcmp (commentbracket, "end") == 0) break;

        if (scanner_isId (commentbracket)) {
            scanner_setKeyword (commentbracket);
        } else if (stringlen (commentbracket) > 1) {
            scanner_setCompoundToken (commentbracket);
        }

        if (scanner_nComments == maxCommentTokens) {
            string message;
            stringprintf (message, "Too many comment conventions (> %d)", maxCommentTokens);
            error ("", message, LIMIT_FATAL, 175);
        }

        scanner_nComments += 1;
        scanner_commentStart[scanner_nComments] = ident_install (commentbracket, treeKind_literal);

        while (true) {
            if (!(charset_spaceP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
            scanner_inputchar += 1;
        }

        if ((lstringchar (scanner_inputline, scanner_inputchar) != EOS) && (lstringchar (scanner_inputline, scanner_inputchar) != '%')) {

            if (lstringchar (scanner_inputline, scanner_inputchar) == '\'') {
                // Quoted comment bracket
                scanner_inputchar += 1;
            }

            if ((lstringchar (scanner_inputline, scanner_inputchar) != EOF_) && (lstringchar (scanner_inputline, scanner_inputchar) != EOS)) {

                startchar = scanner_inputchar;
                while (true) {
                    if (charset_separatorP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) break;
                    scanner_inputchar += 1;
                }
                lsubstring (commentbracket, scanner_inputline, startchar, (scanner_inputchar - 1));
                scanner_commentEnd[scanner_nComments] = ident_install (commentbracket, treeKind_literal);

            } else {
                scanner_commentEnd[scanner_nComments] = NOT_FOUND;
            }

        } else {
            scanner_commentEnd[scanner_nComments] = NOT_FOUND;
        }
    }

    scanner_expectend ("comments");
    scanner_sortCompoundTokens ();
}

static void scanner_processKeywordTokens (void) {
    // User defined keywords
    while (true) {
        scanner_skipSeparators ();

        bool quoted = false;

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\'') {
            scanner_inputchar += 1;
            quoted = true;
        }

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOF_) break;

        const int startchar = scanner_inputchar;
        while (true) {
            if (charset_separatorP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) break;
            scanner_inputchar += 1;
        }

        string key;
        lsubstring (key, scanner_inputline, startchar, (scanner_inputchar - 1));

        if ((stringcmp (key, "end") == 0) && (!quoted)) break;

        scanner_setKeyword (key);
    }

    scanner_expectend ("keys");

    // We should not link these in with sortKeywords until we are done 
    // scanning the entire TXL program, because the keywords specified by 
    // the user do not take effect until we are actually scanning object 
    // language source, such as tokenPatterns, replacements, or input.
}

static void scanner_processTokenPatterns (void) {
    // User defined tokens
    string      name;
    stringcpy (name, "");

    while (true) {
        scanner_skipSeparators ();

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOF_) break;

        if (((lstringchar (scanner_inputline, scanner_inputchar) == '|') || (lstringchar (scanner_inputline, scanner_inputchar) == '+')) 
                && (stringcmp (name, "") != 0)) {
        } else {
            const int startchar = scanner_inputchar;
            while (true) {
                if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                scanner_inputchar += 1;
            }
            lsubstring (name, scanner_inputline, startchar, (scanner_inputchar - 1));

            if (stringcmp (name, "") == 0) {
                string message;
                stringprintf (message, "Syntax error in token pattern definition (expected token name, got '%s'", 
                    &(lstringchar (scanner_inputline, startchar)));
                error ("", message, FATAL, 176);
            }
        }

        if (stringcmp (name, "end") == 0) break;

        scanner_skipSeparators ();

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOF_) break;

        // See if we are extending an exisiting token
        bool extension = false;

        // Allow elipsis for consistency with extended defines
        if (((lstringchar (scanner_inputline, scanner_inputchar) == '.') && (lstringchar (scanner_inputline, scanner_inputchar + 1) == '.')) 
                && (lstringchar (scanner_inputline, scanner_inputchar + 2) == '.')) {
            scanner_inputchar += 3;
            scanner_skipSeparators ();
        }

        // (+ allowed for backward compatibility)
        if ((lstringchar (scanner_inputline, scanner_inputchar) == '|') || (lstringchar (scanner_inputline, scanner_inputchar) == '+')) {
            extension = 1;
            scanner_inputchar += 1;
            scanner_skipSeparators ();

            if ((lstringchar (scanner_inputline, scanner_inputchar)) == EOF_) break;
        }

        const int startchar = scanner_inputchar;

        if ((lstringchar (scanner_inputline, scanner_inputchar)) == '"') {
            scanner_inputchar += 1;
            while (true) {
                if (lstringchar (scanner_inputline, scanner_inputchar) == '"') break;
                if (lstringchar (scanner_inputline, scanner_inputchar) == EOS) break;
                scanner_inputchar += 1;
                if ((lstringchar (scanner_inputline, scanner_inputchar - 1) == '\\') && (lstringchar (scanner_inputline, scanner_inputchar) != EOS)) {
                    scanner_inputchar += 1;
                }
            }

            if (lstringchar (scanner_inputline, scanner_inputchar) == '"') {
                scanner_inputchar += 1;
            }
        }

        string patternString;
        lsubstring (patternString, scanner_inputline, startchar, (scanner_inputchar - 1));

        if (stringcmp (patternString, "\"\"") == 0) {
            // Intentially undefined token 
            stringcpy (patternString, "\"$%&*/ UNDEFINED /*&%$\"");
        }

        if (((stringlen (patternString) < 3) || (stringchar (patternString, 1) != '"')) || (stringchar (patternString, stringlen (patternString)) != '"')) {
            string message;
            stringprintf (message, "Syntax error in token pattern definition (expected pattern string, got '%s')", 
                &(lstringchar (scanner_inputline, startchar)));
            error ("", message, FATAL, 177);
        }

        substring (patternString, patternString, 2, stringlen (patternString) -1);

        // It is either an override, an extension, or a new token
        int newp = 0;
        enum treeKindT kind = treeKind_undefined;

        // See if it is an override or an extension
        for (int p = 1; p <= scanner_nPatterns; p++) {
            if (stringcmp (*ident_idents[scanner_tokenPatterns[p].name], name) == 0) {
                if (!extension) {
                    newp = p;
                }
                kind = scanner_tokenPatterns[p].kind;
                break;
            }
        }

        // Possibly an override or extension of comments
        if (stringcmp (name, "comment") == 0) {
            kind = treeKind_comment;
        }

        // May need a new slot
        if (newp == 0) {
            if (scanner_nPatterns == maxTokenPatterns) {
                string message;
                stringprintf (message, "Too many user-defined token patterns (> %d)", maxTokenPatterns);
                error ("", message, LIMIT_FATAL, 178);
            }

            scanner_nPatterns += 1;
            newp = scanner_nPatterns;
        }

        // May need a new kind
        if (kind == treeKind_undefined) {
            if (scanner_nextUserTokenKind == lastUserTokenKind) {
                error ("", "Too many user-defined token kinds (>30)", LIMIT_FATAL, 179);
            }

            kind = scanner_nextUserTokenKind;
            const int nextKind = (int) scanner_nextUserTokenKind + 1;
            scanner_nextUserTokenKind = (enum treeKindT) nextKind;
        }

        // Automatically sequence any two-byte Unicodes
        int i = 1;
        while (true) {
            if (i >= stringlen (patternString)) break;        // (sic)

            const char pi = patternString[i - 1];

            if (((((unsigned char) pi >= UNICODEA) && ((unsigned char) pi <= UNICODEN)) || ((unsigned char) pi == UNICODEX)) 
                    && ((i == 1) || (patternString[(i - 1) - 1] != '('))) { 
                // Automatically convert to two-character sequence since we process input by byte
                assert (stringlen (patternString) > i);
                string pre_unicode; substring (pre_unicode, patternString, 1, (i - 1));
                string unicode; substring (unicode, patternString, i, (i + 1));
                string post_unicode; substring (post_unicode, patternString, (i + 2), stringlen (patternString));
                stringprintf (patternString, "%s(%s)%s", pre_unicode, unicode, post_unicode);
                i += 4;
            } else {
                i += 1;
            }
        }

        // Fill in the entry
        scanner_setTokenPattern (newp, kind, name, patternString);

        // If the new entry overrides [id], check that it is still valid
        if ((newp == scanner_idPattern) && (!extension)) {
            stringcpy (scanner_inputline, "function ");
            scanner_inputchar = 1;
            const bool idscan = scanner_scanToken (scanner_tokenPatterns[scanner_idPattern].pattern, 1, 
                scanner_tokenPatterns[scanner_idPattern].length, false);
            if (!(idscan && (scanner_inputchar == (stringlen ("function") + 1)))) {
                error ("", "Token pattern for [id] does not allow TXL keywords", FATAL, 180);
            }
        }
    }

    scanner_expectend ("tokens");
    scanner_sortTokenPatterns ();
}

void scanner_tokenize (const string fileNameOrText, const bool isFile, const bool isTxlSource)
{
    string tokentext;

    // What are we tokenizing?
    scanner_fileInput = isFile;         // true => an input file to open, false => a single string to scan (for [parse])
    scanner_txlSource = isTxlSource;    // Scanning a TXL program itself?
    scanner_warnedLines = false;

    // Initialize the default token patterns once
    if (scanner_txlSource) {
        scanner_defaultTokenPatterns ();
    }

    // [pragma "-id"] may change the leading character maps
    if (options_updatedChars) {
        scanner_sortTokenPatterns ();
    }

    // If newlines are tokens, they aren't white space
    if ((options_option[newline_p]) && (!scanner_txlSource)) {
        charset_addSpaceChar ('\n', false);
        charset_addSpaceChar ('\r', false);
    } else {
        // in case we're dynamically switching 
        charset_addSpaceChar ('\n', true);
        charset_addSpaceChar ('\r', true);
    }

    // Open the file to scan and tokenize
    scanner_openFile (fileNameOrText);

    // Empty the array of scanned tokens 
    lastTokenIndex = 0;

    // TXL programs and source files need to be scanned specially due to their multi-language nature
    const bool processingTxl = scanner_txlSource || options_option[txl_p];

    // Keep track of the token and previous token as we scan
    tokenT token = NOT_FOUND;
    tokenT previoustoken = NOT_FOUND;
    tokenT rawtoken = NOT_FOUND;

    while (true) {

        // Skip white space and comments
        while (true) {
            scanner_skipSeparators ();
            if (lstringchar (scanner_inputline, 1) != EOF_) break;

            // If we hit the end of an input file, check we're not crossing file boundaries
            scanner_synchronizePreprocessor ();

            // If we're at the end of an include file, time to continue the previous file
            if (scanner_includeDepth == 0) break;
            scanner_PopInclude ();
        }

        // We're done when we hit EOF on the main input file
        assert ((lstringchar (scanner_inputline, 1) != EOF_) || (scanner_includeDepth == 0));
        if (lstringchar (scanner_inputline, 1) == EOF_) break;

        // Otherwise, there is more text left to scan
        enum treeKindT kind = treeKind_undefined;
        int startchar = scanner_inputchar;

        // Keep track of previous token, for context
        if (previoustoken == quote_T) {
            previoustoken = NOT_FOUND;
        } else {
            previoustoken = token;
        }

        // To begin, we don't know what the next token is
        token = NOT_FOUND;
        rawtoken = token;

        // Step 1. See if it is a defined compound literal
        const int litindex = scanner_compoundIndex[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)];

        if ((litindex != 0) && ((!processingTxl) || 
                    // Make sure that we don't mistake a TXL ... for a user token
                    ((((lstringchar (scanner_inputline, scanner_inputchar) != '.') 
                        || (lstringchar (scanner_inputline, scanner_inputchar + 1) != '.')
                        || (lstringchar (scanner_inputline, scanner_inputchar + 2) != '.')) 
                    || (previoustoken == quote_T))))) {

            if (scanner_scanCompoundLiteral (litindex)) {
                // It is a compound literal token
                kind = treeKind_literal;
                // string tokentext;
                lsubstring (tokentext, scanner_inputline, startchar, (scanner_inputchar - 1));
                token = ident_install (tokentext, treeKind_literal);
                rawtoken = token;
            }
        }

        // Step 2. If not, see if it matches a token pattern
        if (kind == treeKind_undefined) {
            assert (scanner_inputchar == startchar);

            // Handle quotes specially
            if ((processingTxl && ((lstringchar (scanner_inputline, scanner_inputchar)) == '\'')) && (previoustoken != quote_T)) {
                // The first quote is always simply itself in TXL

            } else {
                // Perhaps it is an id, number, string, or user token
                int patindex = scanner_patternIndex[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)];

                if (patindex != 0) {
                    while (true) {
                        const struct scanner_patternEntryT *pp = &(scanner_tokenPatterns[scanner_patternLink[patindex]]);

                        if (scanner_scanToken (pp->pattern, 1, (pp->length), false)) {
                            // Aha! Got one
                            kind = pp->kind;

                            // In TXL itself, user defined tokens must be quoted
                            if (scanner_txlSource && (((kind >= firstUserTokenKind) && (previoustoken != quote_T)) 
                                    || ((pp->name) == ignore_T))) {

                                // Back up, if we rejected an unquoted user token
                                kind = treeKind_undefined;
                                scanner_inputchar = startchar;

                            } else {
                                // Collect and classify the token's text
                                const int endchar = scanner_inputchar - 1;

                                // Fast in place substring to hash arbitrary length token text
                                const char oldchar = lstringchar (scanner_inputline, endchar + 1);
                                lstringchar (scanner_inputline, endchar + 1) = EOS;     // (hup!)

                                // If working in case insensitive mode, need original raw text also
                                if (options_option[case_p]) {
                                    rawtoken = ident_install (&lstringchar (scanner_inputline, startchar), treeKind_id);
                                }

                                // Normalize if lower, upper, or case insensitive
                                if (((((options_option[upper_p]) || (options_option[lower_p])) || (options_option[case_p])) 
                                        && ((kind != treeKind_charlit) && (kind != treeKind_stringlit))) 
                                        && (!(scanner_txlSource && (previoustoken != quote_T)))) {
                                    if (options_option[upper_p]) {
                                        lstringtoupper (&(lstringchar (scanner_inputline, startchar)));
                                    } else {
                                        assert (options_option[lower_p] || options_option[case_p]);
                                        lstringtolower (&(lstringchar (scanner_inputline, startchar)));
                                    }
                                }

                                token = ident_install (&lstringchar (scanner_inputline, startchar), treeKind_id);

                                // If not case insensitive, original == normalized
                                if (!(options_option[case_p])) {
                                    rawtoken = token;
                                }

                                // Undo fast in place substring to hash arbitrary length token text
                                lstringchar (scanner_inputline, endchar + 1) = oldchar;

                                // If it is an id then it might be a keyword
                                if ((!processingTxl) || (previoustoken != quote_T)) {
                                    if (scanner_keyP (token)) {
                                        kind = treeKind_key;
                                    }
                                }

                                break;
                            }
                        }

                        // Back up and try next possibility
                        assert (scanner_inputchar == startchar);
                        patindex += 1;

                        if ((scanner_patternLink[patindex]) == 0) break;
                    }
                }
            }
        }

        // Step 3. If it isn't a compound and doesn't match any token pattern,
        // it must be a single character token
        if (kind == treeKind_undefined) {
            assert (scanner_inputchar == startchar);

            kind = treeKind_literal;
            // string tokentext;
            stringchar (tokentext, 1) = lstringchar (scanner_inputline, scanner_inputchar);
            stringchar (tokentext, 2) = EOS;
            token = ident_install (tokentext, treeKind_literal);
            rawtoken = token;

            if ((lstringchar (scanner_inputline, scanner_inputchar)) == '\n') {
                scanner_linenum += 1;
            }

            scanner_inputchar += 1;

            // Warn if not expecting a quote token 
            if ((lstringchar (scanner_inputline, startchar) == '"') || ((lstringchar (scanner_inputline, startchar) == '\'') && (!processingTxl))) {
                if ((options_option[verbose_p]) 
                        && (((lstringchar (scanner_inputline, startchar) == '"') 
                            && (scanner_tokenPatterns[scanner_stringlitPattern].pattern[1] == '"')) 
                        || ((lstringchar (scanner_inputline, startchar) == '\'') 
                            && (scanner_tokenPatterns[scanner_charlitPattern].pattern[1] == '\'')))) {
                    error ("", "Unmatched opening quote accepted as literal token", WARNING, 168);
                }
            }

            // Funny object language or TXL keyword
            if (scanner_keyP (token)) {
                kind = treeKind_key;
            }

            // If we are processing TXL, it might be a TXL special keyword
            if (processingTxl) {
                if (scanner_keyP (token)) {
                    // A TXL artificial keyword
                    kind = treeKind_key;

                } else if ((token == quote_T) && (lstringchar (scanner_inputline, scanner_inputchar) == '%')) {
                    // A quoted TXL comment character - could be a compound
                    const int pcindex = scanner_compoundIndex['%'];
                    const int pcchar = scanner_inputchar;

                    if ((pcindex != 0) && scanner_scanCompoundLiteral (pcindex)) {
                        // Quoted compound literal
                        kind = treeKind_literal;
                        // string tokentext;
                        lsubstring (tokentext, scanner_inputline, pcchar, (scanner_inputchar - 1));
                        token = ident_install (tokentext, treeKind_literal);
                        rawtoken = token;
                    } else {
                        // Quoted TXL comment character
                        previoustoken = quote_T;
                        token = ident_install ("%", treeKind_literal);
                        rawtoken = token;
                        startchar = scanner_inputchar;  // remember beginning of % in case a user comment
                        scanner_inputchar += 1;
                    }

                } else if (token == underscore_T) {
                    // TXL anonymous variable that the object language doesn't think is an [id]
                    kind = treeKind_id;

                } else {
                    if ((token == dot_T) && (previoustoken != quote_T) && (lstringchar (scanner_inputline, scanner_inputchar) == '.') 
                            && (lstringchar (scanner_inputline, scanner_inputchar + 1) == '.')) {
                        // TXL special define extension marker '...'
                        token = dotDotDot_T;
                        rawtoken = token;
                        kind = treeKind_key;
                        scanner_inputchar += 2;
                    }
                }

                // If we are actually processing a TXL program,o
                // it might be a special TXL rule name
                if (scanner_txlSource && (previoustoken == openbracket_T)) {
                    const char tokenchar = lstringchar (scanner_inputline, startchar);
                    if (((lstringchar (scanner_inputline, scanner_inputchar) == '=') && 
                            ((tokenchar == '~') || (tokenchar == '<') || (tokenchar == '>')))
                        || ((lstringchar (scanner_inputline, scanner_inputchar) == '/') && 
                            (tokenchar == '^'))) {
                        // ~=, <= or >= rule call in TXL
                        scanner_inputchar += 1;
                        // string tokentext;
                        lsubstring (tokentext, scanner_inputline, startchar, (scanner_inputchar - 1));
                        token = ident_install (tokentext, treeKind_literal);
                        rawtoken = token;
                    } else if (tokenchar == '?') {
                        // A query rule call in TXL
                        scanner_skipSeparators ();
                        startchar = scanner_inputchar - 1;
                        lstringchar (scanner_inputline, startchar) = '?';         // make sure the ? rule name has no spaces in it
                        if (charset_alphaidP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)]) {
                            while (true) {
                                if (!(charset_idP[(unsigned char) lstringchar (scanner_inputline, scanner_inputchar)])) break;
                                scanner_inputchar += 1;
                            }
                            // string tokentext;
                            lsubstring (tokentext, scanner_inputline, startchar, (scanner_inputchar - 1));
                            token = ident_install (tokentext, treeKind_id);
                            rawtoken = token;
                            kind = treeKind_id;
                        }
                    }
                }
            }
        }

        // If this is a TXL program, we have to handle the special sections
        if (scanner_txlSource) {
            if ((kind == treeKind_key) && (previoustoken != quote_T)) {

                if (token == keys_T) {
                    scanner_processKeywordTokens ();
                    kind = treeKind_undefined;

                } else if (token == compounds_T) {
                    scanner_processCompoundTokens ();
                    kind = treeKind_undefined;

                } else if (token == comments_T) {
                    scanner_processCommentBrackets ();
                    kind = treeKind_undefined;

                } else if (token == tokens_T) {
                    scanner_processTokenPatterns ();
                    kind = treeKind_undefined;

                } else if (token == include_T) {
                    scanner_PushInclude ();
                    kind = treeKind_undefined;
                }

            } else if ((previoustoken == quote_T) && ((kind == treeKind_literal) || (kind == treeKind_key))) {
                // A quoted object language comment
                const int comindex = scanner_commentindex (token);
                if (comindex != 0) {
                    scanner_scanComment (startchar, comindex);
                    kind = treeKind_undefined;
                }
            }

        } else {
            // Processing object source - check for user comment
            if (((kind == treeKind_literal) || (kind == treeKind_key)) && (!((previoustoken == quote_T) && processingTxl))) {
                const int comindex = scanner_commentindex (token);
                if (comindex != 0) {
                    scanner_scanComment (startchar, comindex);
                    kind = treeKind_undefined;
                }
            } else if (kind == treeKind_comment) {
                // User pattern for comment
                if (!(options_option[comment_token_p])) {
                    kind = treeKind_undefined;
                }
            }
        }

        // If we still have something, add it to the tokens list
        if ((kind != treeKind_undefined) && (kind != treeKind_empty)) {       // empty is an ignored token
            scanner_installToken (kind, token, rawtoken);
        }
    }

    // Keywords from the keys section need sorting for efficiency
    if (scanner_txlSource) {
        scanner_sortKeywords (scanner_nTxlKeys + 1);
    }

    // End the token list with an empty token to mark end of file
    lastTokenIndex += 1;
    struct tokenTableT *inputToken = &(inputTokens[lastTokenIndex]);
    inputToken->token = empty_T;
    inputToken->rawtoken = empty_T;
    inputToken->kind = treeKind_empty;
    inputToken->linenum = (scanner_filenum * maxLines) + scanner_linenum;

    // Close the input file
    scanner_closeFile ();
}

// Initialization
void scanner (void) {
    // TXL sections
    scanner_compoundTokens[0].length = UNUSED;
    scanner_nCompounds = 0;

    scanner_commentStart[0] = UNUSED;
    scanner_commentEnd[0] = UNUSED;
    scanner_nComments = 0;

    scanner_tokenPatterns[0].name = UNUSED;
    scanner_nPredefinedPatterns = 0;
    scanner_nPatterns = 0;

    scanner_patternLink[0] = UNUSED;
    scanner_nPatternLinks = 0;

    scanner_nextUserTokenKind = firstUserTokenKind;

    scanner_keywordTokens[0] = UNUSED;
    scanner_nKeys = 0;
    scanner_nTxlKeys = 0;
    scanner_lastKey = 0;

    // Input file and include handling
    scanner_inputStream = tfstdin;
    scanner_includeDepth = 0;
    stringcpy (scanner_sourceFileDirectory, "");

    // Special newline comment handling
    scanner_patternNLCommentIndex = 0;

    // Use variable buffer size, to force dynamic allocation for efficiency
    scanner_lineBufferSize = maxLineLength*inputBufferFactor + maxStringLength + 1;     // for string type cheats
    arrayalloc (scanner_lineBufferSize, char, scanner_inputline);

    // Intiialize input processing to empty
    lstringchar (scanner_inputline, 1) = EOS;
    stringcpy (scanner_nextinputline, "");
    scanner_nextlength = 0;

    // Used to reduce redundant error messages
    scanner_warnedLines = false;

    // Ifdef stack
    scanner_ifdefStack[0] = UNUSED;
    scanner_ifdefFile[0] = UNUSED;
    scanner_ifdefTop = 0;

    // Predefined token patterns - filled in by defaultTokenPatterns()
    scanner_idPattern = 0;
    scanner_numberPattern = 0;
    scanner_stringlitPattern = 0;
    scanner_charlitPattern = 0;

    assert (charset_upperP[(unsigned char) 138]);
}
